Ex_treme's blog.

基于物品的协同过滤算法(itemcf )

2018/11/20 Share

基本公式

image

实现代码

步骤1:导包

1
2
3
4
5
6
7
8
9
10
11
12
13
import math
import sys
import util.reader as reader
import operator

sys.path.append("../util")

def base_contribute_score():
"""
itemcf base sim contribution score by user
:return:
"""
return 1

步骤2:主流程函数

1
2
3
4
5
6
7
8
9
10
11
12
def main_flow():
"""
main flow of itmecf
:return:
"""
# 获取用户点击信息{'1': ['1', '3', '6', '47', '50', '70'...]
user_click = reader.get_user_click("/home/pzs741/PycharmProjects/cd/data/ratings.csv")
# 计算物品相似度信息{'1':[('780',0.55),('3114',0.54),('356',0.53)...]...}
sim_info = cal_item_sim(user_click)
# 推荐给用户最近行为相关的物品{'1':{'780':0.55,'3114':0.54,'356':0.53,...}...}
recom_result = cal_recom_result(sim_info, user_click)
print(recom_result["1"])

步骤3:计算物品相似度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def cal_item_sim(user_click):
"""
:param user_click:dict, key userid value [itemid1,itemid2]
:return: dict, key:itemid i, value dict, value_key itemid_j, value_value simcore
"""
co_appear = {}
item_user_click_item = {}
for user, itemlist in user_click.items():
for index_i in range(0, len(itemlist)):
itemid_i = itemlist[index_i]
item_user_click_item.setdefault(itemid_i, 0)
item_user_click_item[itemid_i] += 1
for index_j in range(index_i + 1, len(itemlist)):
itemid_j = itemlist[index_j]
co_appear.setdefault(itemid_i, {})
co_appear[itemid_i].setdefault(itemid_j, 0)
co_appear[itemid_i][itemid_j] += base_contribute_score()

co_appear.setdefault(itemid_j, {})
co_appear[itemid_j].setdefault(itemid_i, 0)
co_appear[itemid_j][itemid_i] += base_contribute_score()

item_sim_score = {}
item_sim_score_sorted = {}
for itemid_i, relate_item in co_appear.items():
for itemid_j, co_time in relate_item.items():
sim_score = co_time / math.sqrt(item_user_click_item[itemid_i] * item_user_click_item[itemid_j])
item_sim_score.setdefault(itemid_i, {})
item_sim_score[itemid_i].setdefault(itemid_j, 0)
item_sim_score[itemid_i][itemid_j] = sim_score
for itemid in item_sim_score:
item_sim_score_sorted[itemid] = sorted(item_sim_score[itemid].items(), key=operator.itemgetter(1),
reverse=True)

return item_sim_score_sorted

步骤4:计算推荐结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def cal_recom_result(sim_info, user_click):
"""
recom by itemcf
:param sim_info:item sim dict
:param user_click: user click dict
:return: dict, key:userid value dict, value_key itemid, value_value recom_score
"""
recent_click_num = 3
topk = 5
recom_info = {}
for user in user_click:
click_list = user_click[user]
recom_info.setdefault(user, {})
for itemid in click_list[:recent_click_num]:
if itemid not in sim_info:
continue
for itemidsimzuhe in sim_info[itemid][:topk]:
itemsimid = itemidsimzuhe[0]
itemsimscore = itemidsimzuhe[1]
recom_info[user][itemsimid] = itemsimscore

return recom_info
CATALOG
  1. 1. 基本公式
  2. 2. 实现代码